Read in 10X Genomics files from HB10hpf, HB13hpf, HB16hpf and HB13hpf_DEAB samples, create SeuratObjects, do QC analysis, filter out poor quality cells and process with Signac scRNA-seq/ATAC-seq pipeline.
set mybin <- miniconda3/bin and mypath <- path to cellranger output files before running
mybin <- "/Users/becky/opt/miniconda3/bin"
mypath <- "~/Documents/Projects/Sagerstrom/scRNAseq_ATACseq_2021_02_09"
suppressPackageStartupMessages({
library(Seurat)
library(dplyr)
library(org.Dr.eg.db)
library(BSgenome.Drerio.UCSC.danRer11)
library(Signac)
library(ggplot2)
library(ggsci)
library(limma)
library(AnnotationHub)
library(reticulate)
library(patchwork)
use_python(paste0(mybin,"/python"))
library(JASPAR2020)
library(TFBSTools)
library(motifmatchr)
})
## Warning: package 'AnnotationDbi' was built under R version 4.1.1
## Warning: package 'BiocGenerics' was built under R version 4.1.1
## Warning: package 'Biobase' was built under R version 4.1.1
## Warning: package 'IRanges' was built under R version 4.1.1
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Warning: package 'BSgenome' was built under R version 4.1.1
## Warning: package 'GenomeInfoDb' was built under R version 4.1.1
## Warning: package 'GenomicRanges' was built under R version 4.1.1
## Warning: package 'Biostrings' was built under R version 4.1.1
## Warning: package 'XVector' was built under R version 4.1.1
## Warning: package 'rtracklayer' was built under R version 4.1.1
## Warning: package 'limma' was built under R version 4.1.3
## Warning: package 'AnnotationHub' was built under R version 4.1.1
## Warning: package 'BiocFileCache' was built under R version 4.1.1
## Warning: package 'TFBSTools' was built under R version 4.1.1
## Warning: package 'motifmatchr' was built under R version 4.1.1
options(future.globals.maxSize = 4000 * 1024^2)
ah = AnnotationHub()
## snapshotDate(): 2021-10-20
GRCz11.v99.EnsDb = ah[["AH78759"]] ## cellranger_arc_genomes/zebrafishPlusGFP built on GRCz11.v99.gtf
## loading from cache
## require("ensembldb")
## Warning: package 'ensembldb' was built under R version 4.1.1
## Warning: package 'GenomicFeatures' was built under R version 4.1.1
## Warning: package 'AnnotationFilter' was built under R version 4.1.1
#GRCz11.v103.EnsDb <- ah[["AH89399"]]
mypal <- pal_igv(palette = "default",alpha = 1)(30)
SeuratFromMatrixh5 <- function(inputdata, annotation, frag){
# the 10x hdf5 file contains both data types
rna_counts <- inputdata$`Gene Expression`
atac_counts <- inputdata$Peaks
seurat <- CreateSeuratObject(counts = rna_counts, assay = "RNA")
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = "^mt-")
seurat[["ATAC"]] <- CreateChromatinAssay(
counts = atac_counts,
sep = c(":", "-"),
fragments = frag,
annotation = annotation)
return(seurat)
}
GetQCplots <- function(seurat, mitoPattern){
seurat[["percent.mt"]] <- PercentageFeatureSet(seurat, pattern = mitoPattern)
v <- VlnPlot(seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
f1 <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")
f2 <- FeatureScatter(seurat, feature1 = "nFeature_RNA", feature2 = "percent.mt")
f3 <- FeatureScatter(seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
qc1 <- ggplot(seurat@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mt)) + geom_point(size=0.1) +
scale_color_gradient(low="blue",high="red") + theme_classic()
qc2 <- ggplot(seurat@meta.data, aes(x=nCount_RNA, y=percent.mt)) + geom_point(size=0.1) + scale_x_log10() +
geom_density2d()
qc3 <- ggplot(seurat@meta.data, aes(x=nCount_RNA, y=nFeature_RNA ,color=percent.mt)) + geom_point(size=0.1) +
scale_x_log10() + scale_y_log10() + geom_density2d() +
scale_color_gradient(low="gray",high="darkblue") + theme_classic()
print(v)
print(f1)
print(f2)
print(f3)
print(qc1)
print(qc2)
print(qc3)
return(seurat)
}
GetATACplots <- function(seurat){
DefaultAssay(seurat) <- "ATAC"
seurat <- NucleosomeSignal(seurat)
seurat <- TSSEnrichment(seurat)
v1 <- VlnPlot(
object = seurat,
features = "nCount_ATAC",
pt.size = 0
) + NoLegend()
v2 <- VlnPlot(
object = seurat,
features = "nCount_ATAC",
y.max = 100000,
pt.size = 0
) + NoLegend()
v3 <- VlnPlot(
object = seurat,
features = "nCount_ATAC",
y.max = 5000,
pt.size = 0
) + NoLegend()
v4 <- VlnPlot(
object = seurat,
features = "TSS.enrichment",
pt.size = 0
) + NoLegend()
v5 <- VlnPlot(
object = seurat,
features = "TSS.enrichment",
y.max = 10,
pt.size = 0
) + NoLegend()
v6 <- VlnPlot(
object = seurat,
features = "nucleosome_signal",
pt.size = 0
) + NoLegend()
v7 <- VlnPlot(
object = seurat,
features = "nucleosome_signal",
y.max = 2,
pt.size = 0
) + NoLegend()
print(v1 + v2 + v3 + plot_layout(ncol = 3))
print(v4 + v5 + plot_layout(ncol = 2))
print(v6 + v7 + plot_layout(ncol = 2))
return(seurat)
}
CreatePeaksWithFilter <- function(seurat, frag, blacklist, annotation){
peaks <- CallPeaks(seurat, macs2.path = paste0(mybin,"/macs2")) ## note this has changed so needs SeuratOjbect not fragment file
len.ATAC <- length(seurat@assays$ATAC@ranges)
len.peaks <- length(peaks)
peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
len.std <- length(peaks)
peaks <- subsetByOverlaps(x = peaks, ranges = blacklist, invert = TRUE)
len.bl <- length(peaks)
ChrEnds <- GRanges(seqnames = annotation@seqinfo@seqnames,
ranges = IRanges(start = annotation@seqinfo@seqlengths - 300,
end = annotation@seqinfo@seqlengths))
ChrStarts <- GRanges(seqnames = annotation@seqinfo@seqnames,
ranges = IRanges(start = rep(1,26), end = rep(300,26)))
RemoveChrEndsChrStarts <- c(ChrEnds,ChrStarts)
peaks <- subsetByOverlaps(x = peaks, ranges = RemoveChrEndsChrStarts, invert = TRUE)
len.rends <- length(peaks)
cat("Number original ATAC-seq peaks: ",len.ATAC,"\n")
cat("Number macs2 called ATAC-seq peaks: ",len.peaks,"\n")
cat("Number macs2 ATAC-seq peaks with Standard Chromosomes: ",len.std,"\n")
cat("Number macs2 ATAC-seq peaks after blacklist filter: ",len.bl,"\n")
cat("Number macs2 ATAC-seq peaks after 300bp end filter: ",len.rends,"\n")
macs2_counts <- FeatureMatrix(
fragments = Fragments(seurat),
features = peaks,
cells = colnames(seurat)
)
seurat[["peaks"]] <- CreateChromatinAssay(
counts = macs2_counts,
fragments = frag,
annotation = annotation,
genome = 'GRCz11'
)
DefaultAssay(seurat) <- "peaks"
seurat <- NucleosomeSignal(seurat)
seurat <- TSSEnrichment(seurat)
return(seurat)
}
GetUMAPandClusters <- function(seurat){
# RNA analysis
DefaultAssay(seurat) <- "RNA"
seurat <- SCTransform(seurat, verbose = FALSE, return.only.var.genes = FALSE)
seurat <- RunPCA(seurat)
ElbowPlot(seurat,ndims = 50)
seurat <- RunUMAP(seurat, dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
# ATAC analysis
# We exclude the first dimension as this is typically correlated with sequencing depth
DefaultAssay(seurat) <- "peaks"
seurat <- FindTopFeatures(seurat, min.cutoff = 5)
seurat <- RunTFIDF(seurat)
seurat <- RunSVD(seurat)
seurat <- RunUMAP(seurat, reduction = 'lsi', dims = 2:50, reduction.name = "umap.atac", reduction.key = "atacUMAP_")
DefaultAssay(seurat) <- "SCT"
seurat <- FindMultiModalNeighbors(seurat, reduction.list = list("pca", "lsi"), dims.list = list(1:50, 2:50))
seurat <- RunUMAP(seurat, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", assay = "RNA")
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 1.0)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 1.5)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 2.0)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 2.5)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 3.0)
seurat <- FindClusters(seurat, graph.name = "wsnn", algorithm = 3, verbose = FALSE, resolution = 3.5)
p1 <- DimPlot(seurat, reduction = "umap.rna", label = TRUE, label.size = 5, repel = TRUE, group.by = "wsnn_res.0.8") + ggtitle("RNA") + scale_color_igv()
p2 <- DimPlot(seurat, reduction = "umap.atac", label = TRUE, label.size = 5, repel = TRUE, group.by = "wsnn_res.0.8") + ggtitle("ATAC") + scale_color_igv()
p3 <- DimPlot(seurat, reduction = "wnn.umap", label = TRUE, label.size = 5, repel = TRUE, group.by = "wsnn_res.0.8") + ggtitle("WNN") + scale_color_igv()
print(p1 + p2 + p3 & NoLegend() & theme(plot.title = element_text(hjust = 0.5)))
return(seurat)
}
Use same annotation for each sample
# get gene annotations for GRCz11
annotation <- GetGRangesFromEnsDb(ensdb = GRCz11.v99.EnsDb, verbose = FALSE)
seqlevelsStyle(annotation) <- "UCSC"
genome(annotation) <- "GRCz11"
inputdata.10 <- Read10X_h5(paste0(mypath,"/HB10hpf_analyses/HB10hpf/filtered_feature_bc_matrix.h5"))
## Warning in sparseMatrix(i = indices[] + 1, p = indptr[], x = as.numeric(x =
## counts[]), : 'giveCsparse' has been deprecated; setting 'repr = "T"' for you
## Genome matrix has multiple modalities, returning a list of matrices for this genome
frag.file.10 <- paste0(mypath,"/HB10hpf_analyses/HB10hpf/atac_fragments.tsv.gz")
HB10hpf <- SeuratFromMatrixh5(inputdata.10, annotation, frag.file.10)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Computing hash
HB10hpf
## An object of class Seurat
## 171836 features across 2096 samples within 2 assays
## Active assay: RNA (25108 features, 0 variable features)
## 1 other assay present: ATAC
inputdata.13 <- Read10X_h5(paste0(mypath,"/HB13hpf/filtered_feature_bc_matrix.h5"))
## Warning in sparseMatrix(i = indices[] + 1, p = indptr[], x = as.numeric(x =
## counts[]), : 'giveCsparse' has been deprecated; setting 'repr = "T"' for you
## Genome matrix has multiple modalities, returning a list of matrices for this genome
frag.file.13 <- paste0(mypath,"/HB13hpf/atac_fragemnts_chr.tsv.gz")
HB13hpf <- SeuratFromMatrixh5(inputdata.13, annotation, frag.file.13)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Computing hash
HB13hpf
## An object of class Seurat
## 170988 features across 2047 samples within 2 assays
## Active assay: RNA (25108 features, 0 variable features)
## 1 other assay present: ATAC
inputdata.16 <- Read10X_h5(paste0(mypath,"/HB16hpf_analyses/HB16hpf/filtered_feature_bc_matrix.h5"))
## Warning in sparseMatrix(i = indices[] + 1, p = indptr[], x = as.numeric(x =
## counts[]), : 'giveCsparse' has been deprecated; setting 'repr = "T"' for you
## Genome matrix has multiple modalities, returning a list of matrices for this genome
frag.file.16 <- paste0(mypath,"/HB16hpf_analyses/HB16hpf/atac_fragments.tsv.gz")
HB16hpf <- SeuratFromMatrixh5(inputdata.16, annotation, frag.file.16)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Computing hash
HB16hpf
## An object of class Seurat
## 181937 features across 2157 samples within 2 assays
## Active assay: RNA (25108 features, 0 variable features)
## 1 other assay present: ATAC
inputdata.13deab <- Read10X_h5(paste0(mypath,"/HB13hpf_DEAB_analyses/HB13hpf_DEAB/filtered_feature_bc_matrix.h5"))
## Warning in sparseMatrix(i = indices[] + 1, p = indptr[], x = as.numeric(x =
## counts[]), : 'giveCsparse' has been deprecated; setting 'repr = "T"' for you
## Genome matrix has multiple modalities, returning a list of matrices for this genome
frag.file.13deab <- paste0(mypath,"/HB13hpf_DEAB_analyses/HB13hpf_DEAB/atac_fragments.tsv.gz")
HB13hpf_DEAB <- SeuratFromMatrixh5(inputdata.13deab, annotation, frag.file.13deab)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Computing hash
HB13hpf_DEAB
## An object of class Seurat
## 194043 features across 3017 samples within 2 assays
## Active assay: RNA (25108 features, 0 variable features)
## 1 other assay present: ATAC
HB10hpf <- GetQCplots(HB10hpf, "^mt-")
HB13hpf <- GetQCplots(HB13hpf, "^mt-")
HB16hpf <- GetQCplots(HB16hpf, "^mt-")
HB13hpf_DEAB <- GetQCplots(HB13hpf_DEAB, "^mt-")
HB10hpf <- GetATACplots(HB10hpf)
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
## Warning: Removed 9 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1247 rows containing non-finite values (stat_ydensity).
## Warning: Removed 45 rows containing non-finite values (stat_ydensity).
### 4.2.2 HB13hpf
HB13hpf <- GetATACplots(HB13hpf)
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
## Warning: Removed 55 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1755 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
### 4.2.3 HB16hpf
HB16hpf <- GetATACplots(HB16hpf)
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
## Warning: Removed 8 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1579 rows containing non-finite values (stat_ydensity).
## Warning: Removed 18 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
### 4.2.4 HB13hpf_DEAB
HB13hpf_DEAB <- GetATACplots(HB13hpf_DEAB)
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1778 rows containing non-finite values (stat_ydensity).
## Warning: Removed 55 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
cells.orig <- table(HB10hpf@meta.data$orig.ident)[[1]]
HB10hpf <- subset(x = HB10hpf,
subset = nCount_ATAC < 100000 & nCount_ATAC > 1000)
cells.atac <- table(HB10hpf@meta.data$orig.ident)[[1]]
HB10hpf <- subset(x = HB10hpf,
subset = nCount_RNA < 20000 & nCount_RNA > 500)
cells.rna <- table(HB10hpf@meta.data$orig.ident)[[1]]
HB10hpf <- subset(x = HB10hpf,
subset =percent.mt < 5)
cells.mt <- table(HB10hpf@meta.data$orig.ident)[[1]]
HB10hpf <- subset(x = HB10hpf,
subset = nucleosome_signal < 2 )
cells.nuc <- table(HB10hpf@meta.data$orig.ident)[[1]]
HB10hpf <- subset(x = HB10hpf,
subset = TSS.enrichment >1)
cells.tss <- table(HB10hpf@meta.data$orig.ident)[[1]]
cat("HB10hpf:","\n")
## HB10hpf:
cat("cells in original sample: ",cells.orig,"\n")
## cells in original sample: 2096
cat("cells after ATAC < 10000 & ATAC > 1000: ",cells.atac,"\n")
## cells after ATAC < 10000 & ATAC > 1000: 1759
cat("cells after RNA < 20000 and RNA > 500: ",cells.rna,"\n")
## cells after RNA < 20000 and RNA > 500: 1714
cat("cells after percent.mt < 5: ",cells.mt,"\n")
## cells after percent.mt < 5: 1654
cat("cells after nucleosome < 2: ",cells.nuc,"\n")
## cells after nucleosome < 2: 1654
cat("cells after TSS enrichment > 1",cells.tss,"\n")
## cells after TSS enrichment > 1 1654
cells.orig <- table(HB13hpf@meta.data$orig.ident)[[1]]
HB13hpf <- subset(x = HB13hpf,
subset = nCount_ATAC < 100000 & nCount_ATAC > 1000)
cells.atac <- table(HB13hpf@meta.data$orig.ident)[[1]]
HB13hpf <- subset(x = HB13hpf,
subset = nCount_RNA < 20000 & nCount_RNA > 500)
cells.rna <- table(HB13hpf@meta.data$orig.ident)[[1]]
HB13hpf <- subset(x = HB13hpf,
subset =percent.mt < 5)
cells.mt <- table(HB13hpf@meta.data$orig.ident)[[1]]
HB13hpf <- subset(x = HB13hpf,
subset = nucleosome_signal < 2 )
cells.nuc <- table(HB13hpf@meta.data$orig.ident)[[1]]
HB13hpf <- subset(x = HB13hpf,
subset = TSS.enrichment >1)
cells.tss <- table(HB13hpf@meta.data$orig.ident)[[1]]
cat("HB13hpf:","\n")
## HB13hpf:
cat("cells in original sample: ",cells.orig,"\n")
## cells in original sample: 2047
cat("cells after ATAC < 10000 & ATAC > 1000: ",cells.atac,"\n")
## cells after ATAC < 10000 & ATAC > 1000: 1901
cat("cells after RNA < 20000 and RNA > 500: ",cells.rna,"\n")
## cells after RNA < 20000 and RNA > 500: 1862
cat("cells after percent.mt < 5: ",cells.mt,"\n")
## cells after percent.mt < 5: 1828
cat("cells after nucleosome < 2: ",cells.nuc,"\n")
## cells after nucleosome < 2: 1828
cat("cells after TSS enrichment > 1",cells.tss,"\n")
## cells after TSS enrichment > 1 1828
cells.orig <- table(HB16hpf@meta.data$orig.ident)[[1]]
HB16hpf <- subset(x = HB16hpf,
subset = nCount_ATAC < 100000 & nCount_ATAC > 1000)
cells.atac <- table(HB16hpf@meta.data$orig.ident)[[1]]
HB16hpf <- subset(x = HB16hpf,
subset = nCount_RNA < 20000 & nCount_RNA > 500)
cells.rna <- table(HB16hpf@meta.data$orig.ident)[[1]]
HB16hpf <- subset(x = HB16hpf,
subset =percent.mt < 5)
cells.mt <- table(HB16hpf@meta.data$orig.ident)[[1]]
HB16hpf <- subset(x = HB16hpf,
subset = nucleosome_signal < 2 )
cells.nuc <- table(HB16hpf@meta.data$orig.ident)[[1]]
HB16hpf <- subset(x = HB16hpf,
subset = TSS.enrichment >1)
cells.tss <- table(HB16hpf@meta.data$orig.ident)[[1]]
cat("HB16hpf:","\n")
## HB16hpf:
cat("cells in original sample: ",cells.orig,"\n")
## cells in original sample: 2157
cat("cells after ATAC < 10000 & ATAC > 1000: ",cells.atac,"\n")
## cells after ATAC < 10000 & ATAC > 1000: 1909
cat("cells after RNA < 20000 and RNA > 500: ",cells.rna,"\n")
## cells after RNA < 20000 and RNA > 500: 1885
cat("cells after percent.mt < 5: ",cells.mt,"\n")
## cells after percent.mt < 5: 1750
cat("cells after nucleosome < 2: ",cells.nuc,"\n")
## cells after nucleosome < 2: 1750
cat("cells after TSS enrichment > 1",cells.tss,"\n")
## cells after TSS enrichment > 1 1750
cells.orig <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
HB13hpf_DEAB <- subset(x = HB13hpf_DEAB,
subset = nCount_ATAC < 100000 & nCount_ATAC > 1000)
cells.atac <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
HB13hpf_DEAB <- subset(x = HB13hpf_DEAB,
subset = nCount_RNA < 20000 & nCount_RNA > 500)
cells.rna <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
HB13hpf_DEAB <- subset(x = HB13hpf_DEAB,
subset =percent.mt < 5)
cells.mt <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
HB13hpf_DEAB <- subset(x = HB13hpf_DEAB,
subset = nucleosome_signal < 2 )
cells.nuc <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
HB13hpf_DEAB <- subset(x = HB13hpf_DEAB,
subset = TSS.enrichment >1)
cells.tss <- table(HB13hpf_DEAB@meta.data$orig.ident)[[1]]
cat("HB13hpf_DEAB:","\n")
## HB13hpf_DEAB:
cat("cells in original sample: ",cells.orig,"\n")
## cells in original sample: 3017
cat("cells after ATAC < 10000 & ATAC > 1000: ",cells.atac,"\n")
## cells after ATAC < 10000 & ATAC > 1000: 2523
cat("cells after RNA < 20000 and RNA > 500: ",cells.rna,"\n")
## cells after RNA < 20000 and RNA > 500: 2470
cat("cells after percent.mt < 5: ",cells.mt,"\n")
## cells after percent.mt < 5: 2423
cat("cells after nucleosome < 2: ",cells.nuc,"\n")
## cells after nucleosome < 2: 2423
cat("cells after TSS enrichment > 1",cells.tss,"\n")
## cells after TSS enrichment > 1 2423
Per recommendation from Signac tutorial call ATAC peaks with macs2 and put in assay “peaks”
Found GRCz10 blacklist in Yang, H., Luan, Y., Liu, T. et al. A map of cis-regulatory elements and 3D genome structures in zebrafish. Nature 588, 337–343 (2020). https://doi.org/10.1038/s41586-020-2962-9 Supplementary table 19: Supplemental Table 19. ChIP-seq black list in the zebrafish genome.xlsx
### code used to generate blacklist
# library(readxl)
# library(GenomicRanges)
# library(plyranges)
# GRCz10.blacklist <- read_excel("Supplemental_Table_19_ChIPseq_black_list_in_the_zebrafish_genome.xlsx", col_names = F)
# names(GRCz10.blacklist) <- c("seqnames","start","end")
# head(GRCz10.blacklist)
# GRCz10.blacklist <- as_granges(GRCz10.blacklist)
# head(GRCz10.blacklist)
# chain <- import.chain("danRer10ToDanRer11.over.chain")
# seqlevelsStyle(GRCz10.blacklist) = "UCSC" # necessary
# GRCz11.blacklist = liftOver(GRCz10.blacklist, chain)
# class(GRCz11.blacklist)
# GRCz11.blacklist = unlist(GRCz11.blacklist)
# genome(GRCz11.blacklist) = "GRCz11"
# GRCz11.blacklist
# saveRDS(GRCz11.blacklist, file = "GRCz11.blacklist.granges.RDS")
GRCz11.blacklist <- readRDS(file = "~/Documents/Projects/R_General/GRCz11.blacklist.granges.RDS")
Call peaks with macs2 and filter with blacklist and filter out peaks in 300bp start and end of each chromosome which causes errors in Signac in downstream analyses
HB10hpf <- CreatePeaksWithFilter(HB10hpf, frag.file.10, GRCz11.blacklist, annotation)
## Number original ATAC-seq peaks: 146728
## Number macs2 called ATAC-seq peaks: 143998
## Number macs2 ATAC-seq peaks with Standard Chromosomes: 143998
## Number macs2 ATAC-seq peaks after blacklist filter: 143271
## Number macs2 ATAC-seq peaks after 300bp end filter: 143264
## Extracting reads overlapping genomic regions
## Computing hash
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
HB13hpf <- CreatePeaksWithFilter(HB13hpf, frag.file.13, GRCz11.blacklist, annotation)
## Number original ATAC-seq peaks: 145880
## Number macs2 called ATAC-seq peaks: 195593
## Number macs2 ATAC-seq peaks with Standard Chromosomes: 195592
## Number macs2 ATAC-seq peaks after blacklist filter: 194864
## Number macs2 ATAC-seq peaks after 300bp end filter: 194853
## Extracting reads overlapping genomic regions
## Computing hash
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
HB16hpf <- CreatePeaksWithFilter(HB16hpf, frag.file.16, GRCz11.blacklist, annotation)
## Number original ATAC-seq peaks: 156829
## Number macs2 called ATAC-seq peaks: 159610
## Number macs2 ATAC-seq peaks with Standard Chromosomes: 159609
## Number macs2 ATAC-seq peaks after blacklist filter: 158889
## Number macs2 ATAC-seq peaks after 300bp end filter: 158881
## Extracting reads overlapping genomic regions
## Computing hash
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
HB13hpf_DEAB <- CreatePeaksWithFilter(HB13hpf_DEAB, frag.file.13deab, GRCz11.blacklist, annotation)
## Number original ATAC-seq peaks: 168935
## Number macs2 called ATAC-seq peaks: 164414
## Number macs2 ATAC-seq peaks with Standard Chromosomes: 164413
## Number macs2 ATAC-seq peaks after blacklist filter: 163710
## Number macs2 ATAC-seq peaks after 300bp end filter: 163703
## Extracting reads overlapping genomic regions
## Computing hash
## Extracting TSS positions
## Extracting fragments at TSSs
##
## Computing TSS enrichment score
HB10hpf <- GetUMAPandClusters(HB10hpf)
## PC_ 1
## Positive: lama5, tp63, plekha6, pcdh19, bcam, pls3, tfap2c, itga3b, niban2a, alcama
## fermt1, myh9a, rpe65b, fn1a, fat2, gata2a, grhl2b, cdh1, lamb1a, smad6b
## bmp7a, ahnak, si:ch211-199g17.2, itga6b, dok4, znf385a, cldni, cnksr1, samd10a, fmo5
## Negative: hoxc3a, zeb1b, nova2, XKR4, ncam1a, tenm4, pappab, cntfr, cdh2, rfx4
## mllt3, zbtb16a, prickle2b, trps1, sox5, rnf220a, adgrv1, pax3a, ptprn2, efnb2b
## ntn1a, plxdc2, sulf1, hs3st3b1b, gli2a, lrp2a, plch1, sox13, ek1, zeb2a
## PC_ 2
## Positive: hoxc3a, XKR4, mllt3, fn1b, tbx16, phc2a, sulf1, wnt5b, arid3c, serpinh1b
## rbm38, hoxc6b, msgn1, il17rd, si:dkey-261h17.1, fndc3ba, kif26ab, lef1, unc45b, samd10b
## evx1, pcdh8, her1, hoxb7a, lpar1, raraa, tbx16l, ttn.1, lhfpl6, greb1
## Negative: ncam1a, gli3, nova2, cntfr, zbtb16b, col18a1a, pappab, ptprn2, epha7, plxdc2
## nr2f5, ror1, ek1, sox13, adgrv1, pax3a, rfx4, greb1l, foxp4, pkdcca
## zfhx4, tox, celsr1b, zeb1b, pcdh19, hs3st3b1b, six7, ZNF423, msi1, plch1
## PC_ 3
## Positive: hoxc3a, col18a1a, XKR4, nradd, pcdh19, dag1, greb1, hoxc6b, raraa, phc2a
## tp63, evx1, aopep, bcar3, mllt3, cdx4, hoxb3a, sulf1, kif26ab, lef1
## chrd, alcamb, arid3c, cadm1a, pax3a, RIMBP2, hoxb7a, rfx4, hoxd3a, zgc:110158
## Negative: slit3, rrbp1b, snd1, st6gal2a, dennd4a, eya1, tram1, zgc:174153, calr3b, si:dkey-239j18.2
## quo, zbtb46, si:dkey-26g8.5, ctslb.2, si:dkey-269i1.4.1, zgc:174855, sec24d, ctslb, fam214a, pitx2
## zgc:174154, fkbp11, si:ch211-163l21.8, si:dkey-269i1.4, sorl1, frzb, ctslb.1, camk1b, he1.2, plpp1a
## PC_ 4
## Positive: plxna2, lhfpl6, plod2, fgfrl1a, fbn2b, serpinh1b, si:dkey-22o22.2, fstl1a, ntd5, ttn.2
## epha4a, ttn.1, cxcl12b, col11a1a, mtss1lb, si:dkey-261h17.1, bmpr1ba, loxl3b, fstl1b, rbm24a
## unc45b, zcchc24, enah, caskin1, aplp2, hmcn1, lrig1, arhgap24, qkia, agrn
## Negative: hoxc3a, mllt3, plxna3, zgc:174855, ctslb, si:dkey-239j18.2, si:dkey-269i1.4.1, nradd, si:ch211-163l21.8, ctslb.2
## aopep, zgc:174154, si:dkey-26g8.5, zgc:174153, camk1b, calr3b, he1.2, si:dkey-269i1.4, ctslb.1, quo
## adamts18, pcdh1b, stx3a, samd7, zbtb46, pkib, frzb, he1.1, klf17, bhlha15
## PC_ 5
## Positive: unc45b, zbtb16a, tenm4, ttn.1, fn1b, samd10b, six7, msgn1, sox5, tbx16
## pcdh8, shisa2a, nr2f5, pde4cb, mef2d, hivep2a, zbtb16b, gli2a, lhx2b, hsp90aa1.1
## draxin, ek1, crb2a, large2, zeb1b, tbx6, ptprn2, fgfrl1a, vcana, ANOS1
## Negative: col11a1a, cdh11, mtss1lb, slit3, sema5bb, fndc3ba, hmcn1, gdf11, ddr1, rnf220a
## zgc:158328, nradd, prickle2b, plod2, si:dkey-261h17.1, ppp1r14c, arhgef10, serpinh1b, pmp22b, col2a1a
## loxl3b, hoxc3a, aopep, p4ha1b, col4a6, epha2a, agrn, kif26ab, caskin1, lypd6b
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:13:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:13:23 Read 1654 rows and found 50 numeric columns
## 11:13:23 Using Annoy for neighbor search, n_neighbors = 30
## 11:13:23 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:13:23 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e54383e04c6
## 11:13:23 Searching Annoy index using 1 thread, search_k = 3000
## 11:13:23 Annoy recall = 100%
## 11:13:24 Commencing smooth kNN distance calibration using 1 thread
## 11:13:27 Initializing from normalized Laplacian + noise
## 11:13:27 Commencing optimization for 500 epochs, with 60826 positive edges
## 11:13:31 Optimization finished
## Performing TF-IDF normalization
## Warning in RunTFIDF.default(object = GetAssayData(object = object, slot =
## "counts"), : Some features contain 0 total counts
## Running SVD
## Scaling cell embeddings
## 11:13:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:13:58 Read 1654 rows and found 49 numeric columns
## 11:13:58 Using Annoy for neighbor search, n_neighbors = 30
## 11:13:58 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:13:58 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e546fab8002
## 11:13:58 Searching Annoy index using 1 thread, search_k = 3000
## 11:13:58 Annoy recall = 100%
## 11:14:00 Commencing smooth kNN distance calibration using 1 thread
## 11:14:02 Initializing from normalized Laplacian + noise
## 11:14:02 Commencing optimization for 500 epochs, with 63724 positive edges
## 11:14:05 Optimization finished
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
## 11:14:09 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:14:11 Commencing smooth kNN distance calibration using 1 thread
## 11:14:13 Initializing from normalized Laplacian + noise
## 11:14:13 Commencing optimization for 500 epochs, with 44848 positive edges
## 11:14:17 Optimization finished
HB13hpf <- GetUMAPandClusters(HB13hpf)
## PC_ 1
## Positive: nova2, ncam1a, rfx4, cntfr, zbtb16b, pvrl2l, negr1, gli3, msi1, plch1
## plxdc2, zbtb16a, adgrv1, tet3, sema5ba, lrp2a, blmh, cadm1a, plekhh1, nat8l
## tenm4, crb2a, ncam1b, mbd3b, cgnl1, plp1a, cldn5a, epha4b, qkib, chl1a
## Negative: ttn.2, ttn.1, ahnak, unc45b, rbm24a, efemp2a, alcama, fat2, flncb, arid3c
## hsp90aa1.1, p3h2, fgfrl1a, eya1, rbfox1l, bcam, meox1, si:dkey-261h17.1, lama5, cdh1
## col5a1, fhl3a, kcnq5a, lsp1a, thbs4b, hey1, itga9, plxna2, emp2, efnb2a
## PC_ 2
## Positive: ttn.2, ttn.1, unc45b, efemp2a, hsp90aa1.1, rbm24a, flncb, meox1, rbfox1l, megf10
## efemp2b, enah, kcnq5a, zgc:92429, thbs4b, fgfrl1a, gli2a, cd81a, ryr1b, optc
## si:ch1073-268j14.1, col5a2a, si:ch211-131k2.3, plxna2, col4a2, ptk2ab, fam49al, kirrel3l, kcnn3, mef2d
## Negative: cdh1, znf385a, bcam, plekha6, col14a1a, lama5, myh9a, cldni, actn1, epcam
## tfap2c, fn1a, cldn7b, tp63, flrt2, nrg2a, lima1a, jupa, flna, zyx
## pawr, grhl2b, lamb1a, niban2a, f11r.1, itga6b, rrbp1b, myh9b, asap2b, fermt1
## PC_ 3
## Positive: cdh1, col14a1a, plekha6, myh9a, cldni, fat2, tfap2c, epcam, tp63, cldn7b
## nrg2a, flrt2, asap2b, bcar3, itga6b, fermt1, f11r.1, grhl2b, tiam1b, krt4
## col18a1a, si:ch211-264f5.6, niban2a, ttn.1, fn1a, palm3, myo9b, ank3a, rpe65b, fn1b
## Negative: slit3, col9a2, col11a1a, mtss1lb, ryr2b, col2a1a, col8a1a, loxl3b, arhgap24, col9a3
## p4ha1b, si:ch211-204c21.1.1, col5a3a, znf385b, caskin1, palmda, shha, si:ch211-200p22.4.1, crim1, FQ323119.1
## FP102018.1, sntb2, ablim2, zmp:0000000760, col9a1b, colgalt2, arrdc3a, serpinh1b, dlg1l, ngef
## PC_ 4
## Positive: ebf3a, kcnab1a, pdgfra, abtb2b, snai1b, zfpm1, prex2, runx1t1, nrp2a, rbpms2a
## adgra2, zcchc24, alcama, sh3pxd2aa, tfap2a, foxn3, rxraa, sox10, colec12, col15a1b
## mtus1b, unc5b, osbpl10b, traf4a, sdk2b, lsp1a, cbfb, tgfbr3, p3h2, cdh11
## Negative: ddr1, ncam1a, actn1, dag1, fat2, zgc:162730, nedd9, myh9a, myo1cb, rfx4
## bcam, ttn.1, chl1a, col2a1a, hsp90aa1.1, cadm1a, cldni, pvrl2l, zyx, lrig1
## nova2, jupa, col5a3a, enah, fras1, rnf220a, p4ha1b, mdka, efemp2a, ptk2ab
## PC_ 5
## Positive: zfpm1, rbpms2a, runx1t1, p3h2, adgra2, fgfr2, anks1b, si:ch211-250c4.4, unc5b, bmp6
## alcama, rassf7a, cntfr, tjp1b, tnnt2a, lama5, crb2a, si:ch73-22o12.1, cdh11, lsp1a
## colec12, podxl, si:dkey-261h17.1, si:dkey-49n23.1, adgrv1, gata5, ttn.2, pcdh10a, cxcl12a, arhgap29a
## Negative: kcnab1a, prex2, XKR4, tfap2a, pdgfra, rxraa, sox10, sox5, sdk2b, osbpl10b
## ednrab, sox6, hoxc3a, nrp2a, stard13b, col16a1, nr2f5, slc1a3a, snai1b, col15a1b
## foxd3, mmp15b, myo10l1, pcdh1b, kalrnb, gas2a, si:dkey-22o22.2, pcca, traf4a, zic2b
## 11:14:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:14:53 Read 1828 rows and found 50 numeric columns
## 11:14:53 Using Annoy for neighbor search, n_neighbors = 30
## 11:14:53 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:14:53 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e542bd04c08
## 11:14:53 Searching Annoy index using 1 thread, search_k = 3000
## 11:14:53 Annoy recall = 100%
## 11:14:55 Commencing smooth kNN distance calibration using 1 thread
## 11:14:57 Initializing from normalized Laplacian + noise
## 11:14:57 Commencing optimization for 500 epochs, with 64946 positive edges
## 11:15:01 Optimization finished
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
## 11:15:42 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:15:42 Read 1828 rows and found 49 numeric columns
## 11:15:42 Using Annoy for neighbor search, n_neighbors = 30
## 11:15:42 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:15:42 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e54782fb7b0
## 11:15:42 Searching Annoy index using 1 thread, search_k = 3000
## 11:15:42 Annoy recall = 100%
## 11:15:44 Commencing smooth kNN distance calibration using 1 thread
## 11:15:46 Initializing from normalized Laplacian + noise
## 11:15:46 Commencing optimization for 500 epochs, with 62842 positive edges
## 11:15:50 Optimization finished
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
## 11:15:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:15:56 Commencing smooth kNN distance calibration using 1 thread
## 11:15:58 Initializing from normalized Laplacian + noise
## 11:15:58 Commencing optimization for 500 epochs, with 47116 positive edges
## 11:16:02 Optimization finished
HB16hpf <- GetUMAPandClusters(HB16hpf)
## PC_ 1
## Positive: ttn.2, ttn.1, mef2d, rbfox1l, rbm24a, flncb, si:ch1073-268j14.1, ryr1b, megf10, unc45b
## tspan9a, kcnma1a, rc3h2, txlnba, kcnq5a, hsp90aa1.1, ldb3a, plxna2, mybphb, thrap3a
## smyd1b, nexn, svila, cdh15, srl, musk, fras1, eya1, tnni2b.1, plecb
## Negative: ncam1a, tenm4, nova2, rnf220a, sema5ba, rfx4, cadm1a, cntfr, ncam1b, negr1
## epha4b, plp1a, ptprnb, msi1, zbtb16b, cdh7a, adgrv1, efna2a, pcdh19, pvrl2l
## her4.2, sox13, jag2b, pax3a, zbtb16a, plch1, dacha, pax6a, chl1a, ptprn2
## PC_ 2
## Positive: plekha6, lama5, pls3, camk2d1, ank3a, cdh1, col14a1a, tp63, myh9a, tfap2c
## palm3, slit2, actn1, col7a1, flrt2, pawr, sept12, niban2a, slit3, dock5
## fermt1, iqgap1, pcdh7b, myh9b, lamb1a, ppap2d, wu:fi36a10, arid3c, ahnak, bnc2
## Negative: ttn.1, ttn.2, ncam1a, si:ch1073-268j14.1, rbfox1l, ryr1b, rbm24a, mef2d, unc45b, rc3h2
## hsp90aa1.1, megf10, nova2, enah, ptprua, flncb, txlnba, tenm4, kcnma1a, tspan9a
## smyd1b, gli2a, srl, cdh15, musk, mybphb, myog, kcnq5b, kcnq5a, nexn
## PC_ 3
## Positive: evpla, anxa1c, mboat1, pard6a, eppk1, krt4, si:cabz01007794.1, ppl, krt5, noxo1a
## znf185, agr1, gbgt1l4, myh14, zgc:100868, ppp2r5a, elovl7a, hsd17b12a, dhrs11a, zgc:194679
## grhl1, spaca4l, spint2, cldne, si:dkey-87o1.2, krt92, mctp2a, si:ch211-195b11.3, si:ch211-79k12.1, si:ch211-157c3.4
## Negative: itga9, ebf3a, col5a1, pdgfra, fgfrl1a, nrp2b, alcama, adamts6, lsp1a, adgra2
## colec12, slit3, si:dkey-261h17.1, eya1, serpinh1b, emp2, meis1a, anks1b, p3h2, col12a1b
## bnc2, lamb1a, nectin1b, sept12, lpar1, crim1, meis3, ror1, gse1, postnb
## PC_ 4
## Positive: col14a1a, col18a1a, col7a1, tp63, lama5, col28a2a, pcdh19, rpe65b, actn1, dock5
## myh9a, bcam, dag1, tfap2c, fermt1, zgc:171775, ptk2bb, cldni, col4a6, hs6st1a
## itga3b, bcar3, palm3, ank3a, tiam1b, arhgef25b, flna, nrg2a, ppap2d, padi2
## Negative: ebf3a, itga9, fgfrl1a, col5a1, colec12, kif26ba, nrg1, pcdh1b, dnah5, tanc2a
## cfap57, kirrel1b, pdgfra, ism2a, gfra1a, cdh11, cadm4, lpar1, si:ch211-204c21.1.1, ptprga
## etnk1, syne1b, eya1, dnah2, rbm47, ebf3a.1, foxp1b, zfpm1, alcama, col5a2a
## PC_ 5
## Positive: dnah5, cfap57, dnah2, syne1b, nfasca, pacrg, CCDC39, dnah6, cfap45, crocc2
## zbbx, rnf220a, wu:fi36a10, cfap58, armc4, hydin, chl1a, foxj1a, dnah7, cacna1db
## prickle2b, armc3, spata17, lrriq1, ttc12, whrna, bcam, col11a1a, kif6, lrrc9
## Negative: itga9, pax3a, pdgfra, fgfrl1a, kirrel3l, emp2, lpar1, pax7a, comp, colec12
## postnb, si:dkey-30e9.7, lef1, meox1, tgfbi, prex2, rdh10a, nr2f5, zcchc24, gli2a
## adgra2, aldh1a2, nectin1b, pax7b, meis3, zgc:158328, cast, pleca, ahnak, snai1b
## 11:16:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:16:37 Read 1750 rows and found 50 numeric columns
## 11:16:37 Using Annoy for neighbor search, n_neighbors = 30
## 11:16:37 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:16:37 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e541178eac5
## 11:16:37 Searching Annoy index using 1 thread, search_k = 3000
## 11:16:38 Annoy recall = 100%
## 11:16:39 Commencing smooth kNN distance calibration using 1 thread
## 11:16:42 Initializing from normalized Laplacian + noise
## 11:16:42 Commencing optimization for 500 epochs, with 60272 positive edges
## 11:16:46 Optimization finished
## Performing TF-IDF normalization
## Warning in RunTFIDF.default(object = GetAssayData(object = object, slot =
## "counts"), : Some features contain 0 total counts
## Running SVD
## Scaling cell embeddings
## 11:17:12 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:17:12 Read 1750 rows and found 49 numeric columns
## 11:17:12 Using Annoy for neighbor search, n_neighbors = 30
## 11:17:12 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:17:12 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e54197d3479
## 11:17:12 Searching Annoy index using 1 thread, search_k = 3000
## 11:17:12 Annoy recall = 100%
## 11:17:14 Commencing smooth kNN distance calibration using 1 thread
## 11:17:16 Initializing from normalized Laplacian + noise
## 11:17:16 Commencing optimization for 500 epochs, with 66060 positive edges
## 11:17:20 Optimization finished
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
## 11:17:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:17:25 Commencing smooth kNN distance calibration using 1 thread
## 11:17:28 Initializing from normalized Laplacian + noise
## 11:17:28 Commencing optimization for 500 epochs, with 46012 positive edges
## 11:17:31 Optimization finished
HB13hpf_DEAB <- GetUMAPandClusters(HB13hpf_DEAB)
## PC_ 1
## Positive: lama5, bcam, plekha6, pls3, tfap2c, arid3c, fn1a, tp63, camk2d1, pdzrn3b
## fermt1, myh9a, znf385a, rpe65b, cdh1, palm3, dock5, fat2, actn1, lamb1a
## rarga, ahnak, src, col14a1a, bcar3, itga3b, tiam1b, adamts6, col11a1a, pawr
## Negative: ncam1a, cntfr, nova2, zbtb16a, jag2b, rfx4, her4.2, sox13, plch1, adgrv1
## chl1b, lrp2a, efna2a, rnf220a, gli3, ptprn2, pax6a, pax3a, cdh7a, ntn1a
## ptprnb, greb1l, hs3st3b1b, ntn1b, cadm1a, tenm4, epha4b, msi1, tet3, pax2a
## PC_ 2
## Positive: ttn.2, ttn.1, plxna2, rbm24a, enah, fgfrl1a, col5a2a, col4a2, ripply1, unc45b
## efemp2a, efemp2b, flncb, itga9, aldh1a2, sema3ab, emp2, lhfpl6, lpar1, samd10b
## thrap3a, rbfox1l, rdh10a, pde4cb, thbs4b, megf10, col4a1, si:dkey-261h17.1, qkia, fn1b
## Negative: pcdh19, plekha6, col18a1a, tfap2c, lama5, tp63, pls3, camk2d1, myh9a, fermt1
## col14a1a, rpe65b, palm3, cdh1, dock5, ppap2d, bcar3, fn1a, sept12, dag1
## itga3b, pawr, niban2a, apoeb, znf385a, myo9b, cnksr1, smad6b, sox6, actn1
## PC_ 3
## Positive: slit3, si:dkey-22o22.2, mtss1lb, col11a1a, col2a1a, col5a3a, loxl3b, si:dkey-261h17.1, p4ha1b, caskin1
## cacna1aa, col9a2, lrch2, zmp:0000000760, col9a3, col8a1a, serpinh1b, kank1a, ngef, plod1a
## ntd5, arhgap24, epha4a, slit1b, si:ch211-204c21.1.1, tmem163b, znf385b, galnt16, col9a1b, adgrb1a
## Negative: ttn.1, ttn.2, col5a2a, fat2, efemp2b, col4a2, nectin1b, mllt3, src, enah
## XKR4, sema3ab, aldh1a2, samd10b, unc45b, cpa6, gli2a, col18a1a, fn1b, efemp2a
## si:ch211-246m6.5.1, pde4cb, pcdh19, flncb, arid3c, inpp4b, rdh10a, phc2a, rbm24a, thbs4b
## PC_ 4
## Positive: ebf3a, itga9, snai1b, eya2, lsp1a, zcchc24, ebf3a.1, pdgfra, colec12, eya1
## tbx1, cxcl12b, col15a1b, tgfbr3, alcama, cdh11, adamts6, scube3, pdgfrb, kcnab1a
## pknox2, prex2, thbs3b, kirrel1b, rarga, scube1, btbd11a, hmcn2.1, adgra2, cbfb
## Negative: bcam, pcdh19, lama5, ncam1a, dag1, cntfr, cadm1a, chl1b, epha4a, fstl1b
## lrig1, ddr1, bcar3, prickle2b, hspg2, rnf220a, caskin1, ntn1b, her4.2, adamts3
## bmpr1ba, src, col2a1a, dab2ipb, ttn.1, ptk2ab, nedd9, epha2a, kank1a, plod2
## PC_ 5
## Positive: cdh11, ntn1a, rnf220a, ntn1b, chl1b, scube3, dnah5, cfap57, eya2, adamts3
## ptch2, cfap45, dnah2, armc4, cxcl12b, lsp1a, syne1b, chl1a, ebf3a.1, tbx1
## rassf7a, eya1, epha4b, foxj1a, pappab, plxna2, pdgfrb, adamts6, dnah6, colec12
## Negative: pax3a, kcnab1a, prex2, col15a1b, sox6, slc1a3a, pdgfra, cdon, snai1b, sox10
## sox5, itga9, zbtb16a, cyth1b, si:dkey-22o22.2, lef1, sox9b, atp1b3a, tfap2a, wnt3
## klf7a, tle3b, thsd7aa, sdk2b, nrp2a, cxxc5a, cachd1, msx1b, zfhx4, col16a1
## 11:18:17 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:18:17 Read 2423 rows and found 50 numeric columns
## 11:18:17 Using Annoy for neighbor search, n_neighbors = 30
## 11:18:17 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:18:17 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e544d421f88
## 11:18:17 Searching Annoy index using 1 thread, search_k = 3000
## 11:18:18 Annoy recall = 100%
## 11:18:19 Commencing smooth kNN distance calibration using 1 thread
## 11:18:22 Initializing from normalized Laplacian + noise
## 11:18:22 Commencing optimization for 500 epochs, with 88110 positive edges
## 11:18:27 Optimization finished
## Performing TF-IDF normalization
## Warning in RunTFIDF.default(object = GetAssayData(object = object, slot =
## "counts"), : Some features contain 0 total counts
## Running SVD
## Scaling cell embeddings
## 11:18:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:18:54 Read 2423 rows and found 49 numeric columns
## 11:18:54 Using Annoy for neighbor search, n_neighbors = 30
## 11:18:54 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:18:54 Writing NN index file to temp file /var/folders/85/cmygnxr155gdnf4bc3_30jyh0000gp/T//RtmpC4xO1n/file10e5430244158
## 11:18:54 Searching Annoy index using 1 thread, search_k = 3000
## 11:18:54 Annoy recall = 100%
## 11:18:56 Commencing smooth kNN distance calibration using 1 thread
## 11:18:59 Initializing from normalized Laplacian + noise
## 11:18:59 Commencing optimization for 500 epochs, with 96052 positive edges
## 11:19:04 Optimization finished
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
## 11:19:09 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:19:11 Commencing smooth kNN distance calibration using 1 thread
## 11:19:14 Initializing from normalized Laplacian + noise
## 11:19:14 Commencing optimization for 500 epochs, with 66450 positive edges
## 11:19:19 Optimization finished
# 8 Add Motifs
# Get a list of motif position frequency matrices from the JASPAR database
pfm <- getMatrixSet(
x = JASPAR2020,
opts = list(collection="CORE", all_versions = FALSE)
)
DefaultAssay(HB10hpf) <- "peaks"
HB10hpf <- RegionStats(HB10hpf, genome = BSgenome.Drerio.UCSC.danRer11, assay = "peaks")
HB10hpf <- AddMotifs(
object = HB10hpf,
genome = BSgenome.Drerio.UCSC.danRer11,
pfm = pfm
)
## Building motif matrix
## Finding motif positions
## Creating Motif object
DefaultAssay(HB13hpf) <- "peaks"
HB13hpf <- RegionStats(HB13hpf, genome = BSgenome.Drerio.UCSC.danRer11, assay = "peaks")
HB13hpf <- AddMotifs(
object = HB13hpf,
genome = BSgenome.Drerio.UCSC.danRer11,
pfm = pfm
)
## Building motif matrix
## Finding motif positions
## Creating Motif object
DefaultAssay(HB16hpf) <- "peaks"
HB16hpf <- RegionStats(HB16hpf, genome = BSgenome.Drerio.UCSC.danRer11, assay = "peaks")
HB16hpf <- AddMotifs(
object = HB16hpf,
genome = BSgenome.Drerio.UCSC.danRer11,
pfm = pfm
)
## Building motif matrix
## Finding motif positions
## Creating Motif object
DefaultAssay(HB13hpf_DEAB) <- "peaks"
HB13hpf_DEAB <- RegionStats(HB13hpf_DEAB, genome = BSgenome.Drerio.UCSC.danRer11, assay = "peaks")
HB13hpf_DEAB <- AddMotifs(
object = HB13hpf_DEAB,
genome = BSgenome.Drerio.UCSC.danRer11,
pfm = pfm
)
## Building motif matrix
## Finding motif positions
## Creating Motif object
saveRDS(HB10hpf, file = "RDSfiles/HB10hpf.clustered.RDS")
saveRDS(HB13hpf, file = "RDSfiles/HB13hpf.clustered.RDS")
saveRDS(HB16hpf, file = "RDSfiles/HB16hpf.clustered.RDS")
saveRDS(HB13hpf_DEAB, file = "RDSfiles/HB13hpf_DEAB.clustered.RDS")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ensembldb_2.18.0 AnnotationFilter_1.18.0
## [3] GenomicFeatures_1.46.1 motifmatchr_1.16.0
## [5] TFBSTools_1.32.0 JASPAR2020_0.99.10
## [7] patchwork_1.1.1 reticulate_1.22
## [9] AnnotationHub_3.2.0 BiocFileCache_2.2.0
## [11] dbplyr_2.1.1 limma_3.50.3
## [13] ggsci_2.9 ggplot2_3.3.5
## [15] Signac_1.2.1 BSgenome.Drerio.UCSC.danRer11_1.4.2
## [17] BSgenome_1.62.0 rtracklayer_1.54.0
## [19] Biostrings_2.62.0 XVector_0.34.0
## [21] GenomicRanges_1.46.0 GenomeInfoDb_1.30.0
## [23] org.Dr.eg.db_3.14.0 AnnotationDbi_1.56.1
## [25] IRanges_2.28.0 S4Vectors_0.32.4
## [27] Biobase_2.54.0 BiocGenerics_0.40.0
## [29] dplyr_1.0.7 SeuratObject_4.0.4
## [31] Seurat_4.0.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 SnowballC_0.7.0
## [3] scattermore_0.7 R.methodsS3_1.8.1
## [5] tidyr_1.1.4 bit64_4.0.5
## [7] knitr_1.36 irlba_2.3.3
## [9] DelayedArray_0.20.0 R.utils_2.11.0
## [11] data.table_1.14.2 rpart_4.1-15
## [13] KEGGREST_1.34.0 RCurl_1.98-1.5
## [15] generics_0.1.1 cowplot_1.1.1
## [17] RSQLite_2.2.8 RANN_2.6.1
## [19] future_1.26.1 bit_4.0.4
## [21] tzdb_0.2.0 spatstat.data_2.1-0
## [23] xml2_1.3.2 httpuv_1.6.3
## [25] isoband_0.2.5 SummarizedExperiment_1.24.0
## [27] assertthat_0.2.1 DirichletMultinomial_1.36.0
## [29] xfun_0.27 hms_1.1.1
## [31] jquerylib_0.1.4 evaluate_0.14
## [33] promises_1.2.0.1 fansi_0.5.0
## [35] restfulr_0.0.13 progress_1.2.2
## [37] caTools_1.18.2 igraph_1.2.8
## [39] DBI_1.1.1 htmlwidgets_1.5.4
## [41] sparsesvd_0.2 spatstat.geom_2.3-0
## [43] purrr_0.3.4 ellipsis_0.3.2
## [45] RSpectra_0.16-0 backports_1.3.0
## [47] annotate_1.72.0 biomaRt_2.50.0
## [49] deldir_1.0-6 MatrixGenerics_1.6.0
## [51] vctrs_0.4.1 ROCR_1.0-11
## [53] abind_1.4-5 cachem_1.0.6
## [55] withr_2.4.2 ggforce_0.3.3
## [57] checkmate_2.0.0 sctransform_0.3.3
## [59] GenomicAlignments_1.30.0 prettyunits_1.1.1
## [61] goftest_1.2-3 cluster_2.1.2
## [63] lazyeval_0.2.2 seqLogo_1.60.0
## [65] crayon_1.4.2 hdf5r_1.3.4
## [67] labeling_0.4.2 pkgconfig_2.0.3
## [69] slam_0.1-48 tweenr_1.0.2
## [71] nlme_3.1-153 ProtGenerics_1.26.0
## [73] nnet_7.3-16 rlang_1.0.3
## [75] globals_0.15.1 lifecycle_1.0.1
## [77] miniUI_0.1.1.1 filelock_1.0.2
## [79] dichromat_2.0-0 polyclip_1.10-0
## [81] matrixStats_0.61.0 lmtest_0.9-38
## [83] Matrix_1.3-4 ggseqlogo_0.1
## [85] zoo_1.8-9 base64enc_0.1-3
## [87] ggridges_0.5.3 png_0.1-7
## [89] viridisLite_0.4.0 rjson_0.2.20
## [91] bitops_1.0-7 R.oo_1.24.0
## [93] KernSmooth_2.23-20 blob_1.2.2
## [95] stringr_1.4.0 parallelly_1.32.0
## [97] jpeg_0.1-9 readr_2.0.2
## [99] CNEr_1.30.0 scales_1.1.1
## [101] memoise_2.0.0 magrittr_2.0.1
## [103] plyr_1.8.6 ica_1.0-2
## [105] zlibbioc_1.40.0 compiler_4.1.0
## [107] BiocIO_1.4.0 RColorBrewer_1.1-2
## [109] fitdistrplus_1.1-6 Rsamtools_2.10.0
## [111] cli_3.3.0 listenv_0.8.0
## [113] pbapply_1.5-0 htmlTable_2.3.0
## [115] Formula_1.2-4 MASS_7.3-54
## [117] mgcv_1.8-38 tidyselect_1.1.1
## [119] stringi_1.7.5 highr_0.9
## [121] yaml_2.2.1 latticeExtra_0.6-29
## [123] ggrepel_0.9.1 grid_4.1.0
## [125] VariantAnnotation_1.40.0 sass_0.4.0
## [127] fastmatch_1.1-3 tools_4.1.0
## [129] future.apply_1.8.1 parallel_4.1.0
## [131] rstudioapi_0.13 TFMPvalue_0.0.8
## [133] foreign_0.8-81 lsa_0.73.2
## [135] gridExtra_2.3 farver_2.1.0
## [137] Rtsne_0.15 digest_0.6.28
## [139] BiocManager_1.30.16 shiny_1.7.1
## [141] pracma_2.3.3 qlcMatrix_0.9.7
## [143] Rcpp_1.0.7 BiocVersion_3.14.0
## [145] later_1.3.0 RcppAnnoy_0.0.19
## [147] httr_1.4.2 biovizBase_1.42.0
## [149] colorspace_2.0-2 XML_3.99-0.8
## [151] tensor_1.5 splines_4.1.0
## [153] uwot_0.1.10 RcppRoll_0.3.0
## [155] spatstat.utils_2.2-0 plotly_4.10.0
## [157] xtable_1.8-4 jsonlite_1.7.2
## [159] poweRlaw_0.70.6 R6_2.5.1
## [161] Hmisc_4.6-0 pillar_1.6.4
## [163] htmltools_0.5.2 mime_0.12
## [165] glue_1.6.2 fastmap_1.1.0
## [167] BiocParallel_1.28.0 interactiveDisplayBase_1.32.0
## [169] codetools_0.2-18 utf8_1.2.2
## [171] lattice_0.20-45 bslib_0.3.1
## [173] spatstat.sparse_2.0-0 tibble_3.1.6
## [175] curl_4.3.2 leiden_0.3.9
## [177] gtools_3.9.2 GO.db_3.14.0
## [179] survival_3.2-13 rmarkdown_2.11
## [181] docopt_0.7.1 munsell_0.5.0
## [183] GenomeInfoDbData_1.2.7 reshape2_1.4.4
## [185] gtable_0.3.0 spatstat.core_2.3-0